home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
World of Education
/
World of Education.iso
/
world_p
/
pcshx10b.zip
/
PCSHX10B.EXE
/
UTILS.EXE
/
UTILDOCS.EXE
/
SEDEXEC.C
< prev
Wrap
C/C++ Source or Header
|
1987-12-25
|
34KB
|
883 lines
/* sedexec.c -- execute compiled form of stream editor commands
The single entry point of this module is the function execute(). It
may take a string argument (the name of a file to be used as text) or
the argument NULL which tells it to filter standard input. It executes
the compiled commands in cmds[] on each line in turn.
The function command() does most of the work. Match() and advance()
are used for matching text against precompiled regular expressions and
dosub() does right-hand-side substitution. Getline() does text input;
readout() and memcmp() are output and string-comparison utilities.
==== Written for the GNU operating system by Eric S. Raymond ==== */
#include <stdlib.h>
#include <stdio.h> /* {f}puts, {f}printf, getc/putc, f{re}open, fclose */
#include <ctype.h> /* for isprint(), isdigit(), toascii() macros */
#include <string.h>
#include "sed.h" /* command type structures & miscellaneous constants */
/***** shared variables imported from the main ******/
/* main data areas */
extern char linebuf[]; /* current-line buffer */
extern sedcmd cmds[]; /* hold compiled commands */
extern long linenum[]; /* numeric-addresses table */
/* miscellaneous shared variables */
extern int nflag; /* -n option flag */
extern int eargc; /* scratch copy of argument count */
extern sedcmd *pending; /* ptr to command waiting to be executed */
extern char bits[]; /* the bits table */
/***** end of imported stuff *****/
#define SetIgflag(c) igflag = c
#define RiteCase(c) (igflag? tolower(c): c)
#define ToLower(c) (c | 0x20) /* quick and dirty tolower */
#define TRUE 1
#define FALSE 0
static char LTLMSG[] = "sed: line too long\n";
static char *spend; /* current end-of-line-buffer pointer */
static long lnum = 0L; /* current source line number */
/* append buffer maintenance */
static sedcmd *appends[MAXAPPENDS]; /* array of ptrs to a,i,c commands */
static sedcmd **aptr = appends; /* ptr to current append */
/* genbuf and its pointers */
static char genbuf[GENSIZ];
static char *lcomend = genbuf + GENSIZ;
static char *loc1;
static char *loc2;
static char *locs;
/* input file buffering */
static char inbuffer[MAXIOBUF];
/* command-logic flags */
static int jump; /* jump to cmd's link address if set */
static int delete; /* delete command flag */
static int igflag; /* ignore case in matches */
static int cvcase; /* - = tolower, + = toupper */
/* tagged-pattern tracking */
static char *bracend[MAXTAGS]; /* tagged pattern start pointers */
static char *brastart[MAXTAGS]; /* tagged pattern end pointers */
static char *wordbeg[10]; /* $0 and $1 to $9 */
static char *wordend[10];
static char wordbuf[MAXWBUF];
static char *wordbufend = wordbuf + MAXWBUF;
static int wordcount;
void execute(file)
/* execute the compiled commands in cmds[] on a file */
char *file; /* name of text source file to be filtered */
{
register char *p1, *p2; /* dummy copy ptrs */
register sedcmd *ipc; /* ptr to current command */
char *execp; /* ptr to source */
char *getline(); /* input-getting functions */
void command(), readout();
if (file != NULL) /* filter text from a named file */
if (freopen(file, "r", stdin) == NULL)
fprintf(stderr, "sed: can't open %s\n", file);
setvbuf(stdin, inbuffer, _IOFBF, sizeof(inbuffer));
if (pending) /* there's a command waiting */
{
ipc = pending; /* it will be first executed */
pending = FALSE; /* turn off the waiting flag */
goto doit; /* go to execute it immediately */
}
/* here's the main command-execution loop */
for(;;)
{
/* get next line to filter */
if ((execp = getline(linebuf)) == BAD)
return;
spend = execp;
/* loop through compiled commands, executing them */
for(ipc = cmds; ipc->command; )
{
SetIgflag(ipc->flags.igcase);
cvcase = 0;
/* all no-address commands are selected */
if (ipc->addr1 && !selected(ipc))
{
ipc++;
continue;
}
doit:
command(ipc); /* execute the command pointed at */
if (delete) /* if delete flag is set */
break; /* don't exec rest of compiled cmds */
if (jump) /* if jump set, follow cmd's link */
{
jump = FALSE;
if ((ipc = ipc->u.link) == 0)
{
ipc = cmds;
break;
}
}
else /* normal goto next command */
ipc++;
}
/* we've now done all modification commands on the line */
/* here's where the transformed line is output */
if (!nflag && !delete)
{
for(p1 = linebuf; p1 < spend; p1++)
putc(*p1, stdout);
putc('\n', stdout);
}
/* if we've been set up for append, emit the text from it */
if (aptr > appends)
readout();
delete = FALSE; /* clear delete flag; about to get next cmd */
}
}
int selected(ipc)
/* is current command selected */
sedcmd *ipc;
{
register char *p1 = ipc->addr1; /* point p1 at first address */
register char *p2 = ipc->addr2; /* and p2 at second */
int c;
if (ipc->flags.inrange)
{
if (*p2 == CLNUM)
{
c = p2[1];
if ((c = lnum - linenum[c]) >= 0)
{
ipc->flags.inrange = FALSE;
if (c>0)
return(ipc->flags.allbut);
}
}
else if (*p2 != CEND && match(p2, 0))
ipc->flags.inrange = FALSE;
}
else
{
if (*p1 == CEND)
{
if (!lastline())
return(ipc->flags.allbut);
}
else if (*p1 == CLNUM)
{
c = p1[1];
if (lnum != linenum[c])
{
return(ipc->flags.allbut);
}
}
else if (!match(p1, 0))
return(ipc->flags.allbut);
if (p2)
{
if (*p2 == CEND)
ipc->flags.inrange = TRUE;
else if (*p2 == CLNUM)
{
c = p2[1];
ipc->flags.inrange = (lnum < linenum[c]);
}
else
ipc->flags.inrange = !match(p2,0);
}
}
return(!ipc->flags.allbut);
}
int match(expbuf, gf) /* uses genbuf */
/* match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
char *expbuf;
{
register int c;
register char *p1, *p2;
if (gf)
{
if (*expbuf) return(FALSE);
p1 = linebuf; p2 = genbuf;
while (*p1++ = *p2++);
locs = p1 = loc2;
}
else
{
p1 = linebuf;
locs = FALSE;
}
p2 = expbuf;
if (*p2++)
{
loc1 = p1;
if(*p2 == CCHR && p2[1] != RiteCase(*p1))
return(FALSE); /* fail - 1st char is wrong */
return(advance(p1, p2)); /* else try to match rest */
}
/* quick check for 1st character if it's literal */
if (*p2 == CCHR)
{
c = p2[1]; /* pull out character to search for */
p1--;
do {
if (igflag)
while (*++p1 && ToLower(*p1)!=c);
else
while (*++p1 && *p1!=c);
if (*p1 && advance(p1, p2)) /* found it, match the rest */
return(loc1 = p1, 1);
} while (*p1);
return(FALSE); /* didn't find that first char */
}
/* else try for unanchored match of the pattern */
do {
if (advance(p1, p2)) return(loc1 = p1, 1);
} while (*p1++);
/* if got here, didn't match either way */
return(FALSE);
}
int advance(lp, ep)
/* attempt to advance match pointer by one pattern element */
register unsigned char *lp; /* source (linebuf) ptr */
register unsigned char *ep; /* regular expression element ptr */
{
register char *curlp; /* save ptr for closures */
int c; /* scratch character holder */
char *bbeg;
int ct; /* number of characters per match */
for (;;)
{
curlp = lp; /* save old location for STAR */
ct = 1; /* default to take 1 char back */
switch (*ep++)
{
case CCHR: /* literal character */
c = *lp++;
if (*ep++ == RiteCase(c)) /* if chars are equal */
continue; /* matched */
return(FALSE); /* else return false */
case CDOT: /* anything but newline */
if (*lp++) /* first NUL is at EOL */
continue; /* keep going if didn't find */
return(FALSE); /* else return false */
case CNL: /* start-of-line */
case CDOL: /* end-of-line */
if (*lp == 0) /* found that first NUL? */
continue; /* yes, keep going */
return(FALSE); /* else return false */
case CBWD: /* begin word */
if (lp>linebuf && ((c = *(lp-1))=='_' || isalnum(c)))
return(FALSE);
continue;
case CEWD: /* end word */
if ((c = *lp)=='_' || isalnum(c))
return(FALSE);
continue;
case CEOF: /* end-of-address mark */
loc2 = lp; /* set second loc */
return(TRUE); /* return true */
case CALF: /* alphanumeric */
c = *lp++;
if (!isalnum(c))
return(FALSE);
if (*ep++ == 'A')
while (isalnum(*lp)) lp++;
continue;
case CLET: /* letter */
c = *lp++;
if (!isalpha(c))
return(FALSE);
if (*ep++ == 'L')
while (isalpha(*lp)) lp++;
continue;
case CDIG: /* digit */
c = *lp++;
if (!isdigit(c))
return(FALSE);
if (*ep++ == 'D')
while (isdigit(*lp)) lp++;
continue;
case CHEX: /* hexdigit */
c = *lp++;
if (!isxdigit(c))
return(FALSE);
if (*ep++ == 'H')
while (isxdigit(*lp)) lp++;
continue;
case CSPS: /* space */
c = *lp++;
if (!isspace(c))
return(FALSE);
if (*ep++ == 'S')
while (isspace(*lp)) lp++;
continue;
case CCL: /* a closure */
c = *lp++;
c = RiteCase(c);
if (ep[c>>3] & bits[c & 07]) /* is char in set? */
{
ep += 32; /* then skip rest of bitmask */
continue; /* and keep going */
}
return(FALSE); /* else return false */
case CBRA: /* start of tagged pattern */
brastart[*ep++] = lp; /* mark it */
continue; /* and go */
case CKET: /* end of tagged pattern */
bracend[*ep++] = lp; /* mark it */
continue; /* and go */
case CBACK:
bbeg = brastart[*ep];
ct = bracend[*ep++] - bbeg;
if (memcmp(bbeg, lp, ct))
return(FALSE);
lp += ct;
continue;
case CBACK|STAR:
bbeg = brastart[*ep];
ct = bracend[*ep++] - bbeg;
while(!memcmp(bbeg, lp, ct))
lp += ct;
goto star;
case CDOT|STAR: /* match .* */
while (*lp) lp++; /* match anything */
goto star; /* now look for followers */
case CCHR|STAR: /* match <literal char>* */
while (RiteCase(*lp) == *ep) lp++; /* match many of that char */
ep++; /* to start of next element */
goto star; /* match it and followers */
case CALF|STAR: /* alphanumeric* */
while (isalnum(*lp)) lp++;
if (*ep++=='a') goto star;
goto star2;
case CLET|STAR: /* letter* */
while (isalpha(*lp)) lp++;
if (*ep++=='l') goto star;
goto star2;
case CDIG|STAR: /* digit* */
while (isdigit(*lp)) lp++;
if (*ep++=='d') goto star;
goto star2;
case CHEX|STAR: /* hex* */
while (isxdigit(*lp)) lp++;
if (*ep++=='h') goto star;
goto star2;
case CSPS|STAR: /* space* */
while (isspace(*lp)) lp++;
if (*ep++=='s') goto star;
goto star2;
case CCL|STAR: /* [...]* */
do {
c = *lp++; /* match any in set */
c = RiteCase(c);
} while
(ep[c>>3] & bits[c & 07]);
lp--;
ep += 32; /* skip past the set */
goto star; /* match followers */
star2: ct = lp-curlp; /* either take all or none */
star: /* the recursion part of a * or + match */
if (lp == curlp) /* 0 matches */
continue;
if (*ep == CCHR)
{
c = ep[1];
do {
if (RiteCase(*lp) != c)
continue;
if (advance(lp, ep))
return(TRUE);
} while ((lp -= ct) >= curlp);
return(FALSE);
}
if (*ep == CBACK)
{
c = *(brastart[ep[1]]);
do {
if (RiteCase(*lp) != c)
continue;
if (advance(lp, ep))
return(TRUE);
} while ((lp -= ct) >= curlp);
return(FALSE);
}
do {
if (lp == locs)
break;
if (advance(lp, ep))
return(TRUE);
} while ((lp -= ct) >= curlp);
return(FALSE);
default:
fprintf(stderr, "sed: RE error, %o\n", *--ep);
}
}
}
int substitute(ipc)
/* perform s command */
sedcmd *ipc; /* ptr to s command struct */
{
void dosub(); /* for if we find a match */
wordcount = -1;
if (match(ipc->u.lhs, 0)) /* if no match */
dosub(ipc->rhs); /* perform it once */
else
return(FALSE); /* command fails */
if (ipc->flags.global) /* if global flag enabled */
while(*loc2) /* cycle through possibles */
if (match(ipc->u.lhs, 1)) /* found another */
dosub(ipc->rhs); /* so substitute */
else /* otherwise, */
break; /* we're done */
return(TRUE); /* we succeeded */
}
void dosub(rhsbuf) /* uses linebuf, genbuf, spend */
/* generate substituted right-hand side (of s command) */
char *rhsbuf; /* where to put the result */
{
register char *lp, *sp, *rp, *wp, *tp;
int c, c1;
char *place();
/* copy linebuf to genbuf up to location 1 */
lp = linebuf; sp = genbuf;
while (lp < loc1) *sp++ = *lp++;
for (rp = rhsbuf; c = *rp++; )
{
if (c == '&')
{
sp = place(sp, loc1, loc2);
continue;
}
else if (c == '\\')
{
c1 = 1;
switch ((c=*rp++))
{
case 'l': cvcase = -1;
break;
case 'L': cvcase = -2;
break;
case 'u': cvcase = 1;
break;
case 'U': cvcase = 2;
break;
case 'e':
case 'E': cvcase = 0;
break;
default: if (c >= '1' && c < MAXTAGS+'1')
sp = place(sp, brastart[c-'1'],
bracend[c-'1']);
else
c1 = 0;
}
if (c1) continue;
}
else if (c == '$' && isdigit(*rp))
{
if (wordcount<0)
{
wordbeg[0] = wp = wordbuf;
wordcount = 0;
for (tp=linebuf; wp<wordbufend && (*wp++ = *tp++); );
*(wordbufend-1) = '\0';
wordend[0] = wp-1;
wp = wordbuf;
while (*wp)
{
while ((c=*wp) && (c==' ' || c=='\t' || c=='\n'))
wp++;
if (!c) break;
if (++wordcount < 10)
wordbeg[wordcount] = wp;
while ((c=*wp) && c!=' ' && c!='\t' && c!='\n')
wp++;
if (wordcount < 10)
wordend[wordcount] = wp;
}
}
c1 = *rp++ - '0';
if (c1<=wordcount)
sp = place(sp, wordbeg[c1], wordend[c1]);
continue;
}
*sp++ = c;
if (sp >= genbuf + GENSIZ)
fprintf(stderr, LTLMSG);
}
lp = loc2;
/* MRY loc2 = sp - genbuf + linebuf; */
loc2 = sp - (genbuf - linebuf);
while (*sp++ = *lp++)
if (sp >= genbuf + GENSIZ)
fprintf(stderr, LTLMSG);
lp = linebuf; sp = genbuf;
while (*lp++ = *sp++);
spend = lp-1;
}
char *place(asp, al1, al2) /* uses genbuf */
/* place chars at *al1...*(al2 - 1) at asp... in genbuf[] */
register char *asp, *al1, *al2;
{
while (al1 < al2)
{
*asp = *al1++;
if (cvcase<0)
*asp = tolower(*asp);
else if (cvcase>0)
*asp = toupper(*asp);
if (++asp >= genbuf + GENSIZ)
fprintf(stderr, LTLMSG);
}
if (cvcase==1 || cvcase==-1) cvcase = 0;
return(asp);
}
void listto(p1, fp)
/* write a hex dump expansion of *p1... to fp */
register char *p1; /* the source */
FILE *fp; /* output stream to write to */
{
p1--;
while(*p1++)
if (isprint(*p1))
putc(*p1, fp); /* pass it through */
else
{
putc('\134', fp); /* emit a backslash */
switch(*p1)
{
case '\10': putc('b', fp); break; /* BS */
case '\11': putc('t', fp); break; /* TAB */
/* \11 was \9 --MRY */
case '\12': putc('n', fp); break; /* NL */
case '\15': putc('r', fp); break; /* CR */
case '\33': putc('e', fp); break; /* ESC */
default: fprintf(fp, "%02x", *p1 & 0xFF);
}
}
putc('\n', fp);
}
void command(ipc)
/* execute compiled command pointed at by ipc */
sedcmd *ipc;
{
static int didsub; /* true if last s succeeded */
static char holdsp[MAXHOLD]; /* the hold space */
static char *hspend = holdsp; /* hold space end pointer */
register char *p1, *p2, *p3;
unsigned char *y1, *y2;
register int i;
char *execp, *getline();
void readout();
switch(ipc->command)
{
case ACMD: /* append */
*aptr++ = ipc;
if (aptr >= appends + MAXAPPENDS)
fprintf(stderr,
"sed: too many appends after line %ld\n",
lnum);
*aptr = 0;
break;
case CCMD: /* change pattern space */
delete = TRUE;
if (!ipc->flags.inrange || lastline())
printf("%s\n", ipc->u.lhs);
break;
case DCMD: /* delete pattern space */
delete++;
break;
case CDCMD: /* delete a line in hold space */
p1 = p2 = linebuf;
while(*p1 != '\n')
if (delete = (*p1++ == 0))
return;
p1++;
while(*p2++ = *p1++) continue;
spend = p2-1;
jump++;
break;
case EQCMD: /* show current line number */
fprintf(stdout, "%ld\n", lnum);
break;
case GCMD: /* copy hold space to pattern space */
p1 = linebuf; p2 = holdsp; while(*p1++ = *p2++);
spend = p1-1;
break;
case CGCMD: /* append hold space to pattern space */
*spend++ = '\n';
p1 = spend; p2 = holdsp;
while(*p1++ = *p2++)
if (p1 >= linebuf + MAXBUF)
break;
spend = p1-1;
break;
case HCMD: /* copy pattern space to hold space */
p1 = holdsp; p2 = linebuf; while(*p1++ = *p2++);
hspend = p1-1;
break;
case CHCMD: /* append pattern space to hold space */
*hspend++ = '\n';
p1 = hspend; p2 = linebuf;
while(*p1++ = *p2++)
if (p1 >= holdsp + MAXBUF)
break;
hspend = p1-1;
break;
case ICMD: /* insert text */
puts(ipc->u.lhs);
break;
case BCMD: /* branch to label */
jump = TRUE;
break;
case LCMD: /* list text */
listto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
case NCMD: /* read next line into pattern space */
if (!nflag)
puts(linebuf); /* flush out the current line */
if (aptr > appends)
readout(); /* do pending a, r commands */
if ((execp = getline(linebuf)) == BAD)
{
pending = ipc;
delete = TRUE;
break;
}
spend = execp;
break;
case CNCMD: /* append next line to pattern space */
if (aptr > appends)
readout();
*spend++ = '\n';
if ((execp = getline(spend)) == BAD)
{
pending = ipc;
delete = TRUE;
break;
}
spend = execp;
break;
case PCMD: /* print pattern space */
puts(linebuf);
break;
case CPCMD: /* print one line from pattern space */
cpcom: /* so s command can jump here */
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
putc(*p1++, stdout);
putc('\n', stdout);
break;
case QCMD: /* quit the stream editor */
if (!nflag)
puts(linebuf); /* flush out the current line */
if (aptr > appends)
readout(); /* do any pending a and r commands */
exit(0);
case RCMD: /* read a file into the stream */
*aptr++ = ipc;
if (aptr >= appends + MAXAPPENDS)
fprintf(stderr,
"sed: too many reads after line %ld\n",
lnum);
*aptr = 0;
break;
case SCMD: /* substitute RE */
didsub = substitute(ipc);
if (ipc->flags.print && didsub)
if (ipc->flags.print == TRUE)
puts(linebuf);
else
goto cpcom;
if (didsub && ipc->fout)
{
fputs(linebuf,ipc->fout);
putc('\n',ipc->fout);
}
break;
case TCMD: /* branch on last s successful */
case CTCMD: /* branch on last s failed */
if (didsub == (ipc->command == CTCMD))
break; /* no branch if last s failed, else */
didsub = FALSE;
jump = TRUE; /* set up to jump to assoc'd label */
break;
case CWCMD: /* write one line from pattern space */
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
putc(*p1++, ipc->fout);
putc('\n', ipc->fout);
break;
case WCMD: /* write pattern space to file */
fputs(linebuf, ipc->fout);
putc('\n', ipc->fout);
break;
case XCMD: /* exchange pattern and hold spaces */
p1 = linebuf; p2 = genbuf; while(*p2++ = *p1++) continue;
p1 = holdsp; p2 = linebuf; while(*p2++ = *p1++) continue;
spend = p2 - 1;
p1 = genbuf; p2 = holdsp; while(*p2++ = *p1++) continue;
hspend = p2 - 1;
break;
case YCMD:
y1 = linebuf; y2 = ipc->u.lhs;
while(*y1 = y2[*y1])
y1++;
break;
}
}
char *getline(buf)
/* get next line of text to be filtered */
register char *buf; /* where to send the input */
{
if (gets(buf) != NULL)
{
lnum++; /* note that we got another line */
while(*buf++); /* find the end of the input */
return(--buf); /* return ptr to terminating null */
}
else
return(BAD);
}
int lastline() /* decide whether this is the last line */
{
register int ch;
if (eargc)
return(FALSE);
ch=getc(stdin);
if (ch==EOF)
return(TRUE);
ungetc(ch,stdin);
return(FALSE);
}
void readout()
/* write file indicated by r command to output */
{
register char *p1; /* character-fetching dummy */
register int t; /* hold input char or EOF */
FILE *fi; /* ptr to file to be read */
aptr = appends - 1; /* arrange for pre-increment to work right */
while(*++aptr)
if ((*aptr)->command == ACMD) /* process "a" cmd */
puts((*aptr)->u.lhs);
else /* process "r" cmd */
{
if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL)
continue;
while((t = getc(fi)) != EOF)
putc((char) t, stdout);
fclose(fi);
}
aptr = appends; /* reset the append ptr */
*aptr = 0;
}
/* sedexec.c ends here */